/**********************************************************************/
/*
   Author: Michelle Han
   Created: October 11, 2022
   Description: Merges and cleans reshaped PMO data transfers:
		 1. pmo_b1-22_raw_wide.dta
		 2. pmo_b1-22_raw_add_vars_wide.dta
		 3. pmo_b1-22_raw_add_vars_baseline_wide.dta
		 4. pmo_b1-22_raw_add_vars_domicili_wide.dta

	 Outputs data at person level.
   Note that this code works with PII data (not deidentified).

   Note: to set filepaths, run MASTER.do.

   Outputs:
   pmo_b1-22_clean_wide.dta

*/
/**********************************************************************/

	* Set Filepaths
	if "$master_run" !="1" include "./Do/SET_FILEPATHS.do"
	* Log
	cap log close
	local prefix: display %tdCYND td(`c(current_date)')
	log using "$KP_logs/`prefix'_PMO_merge_wide_all.txt", replace text

/*----------------------------------------------------*/
            /* Section: Load Data*/
/*----------------------------------------------------*/

* load first batch
	u "$KP_deid_admin/Clean/pmo_b1-22_raw_wide.dta", clear

* Rename conflicting variables
	rename has_passed_current_batch* has_passed_current_batch_old*
	rename hh_applied* hh_applied_old*
	rename hh_win_in_batch* hh_win_in_batch_old*
	rename win_batch batch_treated_22
	rename first_apply_batch first_apply_batch_old

* Merge in updated treat status and previously dropped observations
	preserve
	u "$KP_deid_admin/Clean/pmo_b1-22_raw_add_vars_wide.dta", clear

	* merge in baseline characteristics
	merge 1:1 anon_id4 using "$KP_deid_admin/Clean/pmo_b1-22_raw_add_vars_baseline_wide.dta", nogen keep(1 3)
	rename win_batch batch_treated_22
	rename final_test_score test_score

	tempfile add_vars
	save `add_vars'
	restore

	merge 1:1 anon_id4 using `add_vars'

* merge in updated domicili
	merge 1:1 anon_id4 using "$KP_deid_admin/Clean/pmo_b1-22_raw_add_vars_domicili_wide.dta", nogen keepusing(anon_prov_id) update replace keep(1 3 4 5)

* save raw data
	save "$KP_deid_admin/Clean/all_batches_raw_1-22.dta", replace


	/*----------------------------------------------------*/
	            /* Section: Clean */
	/*----------------------------------------------------*/

	* check missings
	count if mi(anon_hh_id)
	di `r(N)' / _N
	
	forval i = 1/22 {
		* Check old and new has_passed_current_batch
		gen has_passed_current_batch_diff`i' = 0
		replace has_passed_current_batch_diff`i' = 1 if has_passed_current_batch`i' != has_passed_current_batch_old`i' & !missing(has_passed_current_batch`i') & !missing(has_passed_current_batch_old`i')
		tab has_passed_current_batch_diff`i'

		* if missing has_passed_current_batch from new data transfer, replace with old has_passed_current_batch variable
		replace has_passed_current_batch`i' = has_passed_current_batch_old`i' if missing(has_passed_current_batch`i')
	}

* Generate indicators for individuals ever winning
	egen ever_win_17 = rowtotal(has_passed_current_batch1-has_passed_current_batch17)
	egen ever_win_22 = rowtotal(has_passed_current_batch1-has_passed_current_batch22)
	gen ever_win_39 = 0
	replace ever_win_39 = 1 if !missing(batch_treated_39)
	table ever_win_17 ever_win_22 ever_win_39

* Generate indicators for HH ever winning
	egen hh_ever_win_17 = max(ever_win_17), by(anon_hh_id)
	egen hh_ever_win_22 = max(ever_win_22), by(anon_hh_id)
	table hh_ever_win_17 hh_ever_win_22

* If missing has_passed_current_batch and applied = 1, replace with 0
	forval i = 1/22 {
		replace has_passed_current_batch`i' = 0 if applied`i' == 1 & missing(has_passed_current_batch`i')
	}

* Replace first_apply_batch if missing
	replace first_apply_batch = first_apply_batch_old if missing(first_apply_batch)

* HH Win in Batch New
	forval i = 1/22 {
		egen hh_win_in_batch`i' = max(has_passed_current_batch`i'), by(anon_hh_id)
	}

* Rename variables
	rename _merge data_source
	la def data_source 1 "Original Data Only" 2 "Additional Variables Data Transfer Only" 3 "Match"
	la val data_source data_source
	rename has_passed_current_batch* win_in_batch*

* Label variables
	la var ever_win_17 "Ever Won Prakerja, by Batch 17 (updated data)"
	la var ever_win_22 "Ever Won Prakerja, by Batch 22 (updated data)"
	la var ever_win_39 "Ever Won Prakerja, by Batch 39 (updated data)"
	la var first_apply_batch "Batch of First Application (updated data)"
	la var first_apply_batch_old "Batch of First Application (original data)"
	la var year_dob "Year of Birth"
	la var anon_month_dob "Month of Birth"
	la var gender "Gender"
	la var test_score "Score on Skills Test"
	la var education "Education Level"
	la var aaa1 "[For 2020 registrants] Are you unemployed at the moment?"
	la var aaa2 "[For 2020 registrants] Are you an employee?"
	la var aaa3 "[For 2020 registrants] Have you ever worked before?"
	la var aaa4 "[For 2020 registrants] Are you unemployed due to Covid-19?"
	la var aaa5 "[For 2020 registrants] Do you have job contract?"
	la var aaa6 "[For 2020 registrants] Are you self-employed?"
	la var aaa7 "[For 2020 registrants] Covid-19 impact: decrease revenue, less customer?"
	la var aaa8 "[For 2020 registrants] Is your business closed temporarily due to government advice?"
	la var aaa9 "[For 2020 registrants] Is your business closed temporarily because you cannot pay your employee?"
	la var aaa20 "[For 2021, semester 1 registrants] Are you unemployed at the moment?"
	la var aaa21 "[For 2021 semester 1 registrants] Are you an employee?"
	la var aaa22 "[For 2021 semester 1 registrants] Have you ever worked before?"
	la var aaa23 "[For 2021 semester 1 registrants] Are you self-employed?"
	la var aaa24 "[For 2021 semester 1 registrants] Do you have job contract?"
	la var aaa25 "[For 2021 semester 1 registrants] Are you unemployed due to Covid-19?"
	la var aaa26 "[For 2021 semester 1 registrants] Covid-19 impact: decrease revenue, less customer?"
	la var aaa27 "[For 2021 semester 1 registrants] Does your working hour decrease due to Covid-19?"
	la var aaa31 "[For 2021 semester 2 registrants] Are you unemployed at the moment?"
	la var aaa32 "[For 2021 semester 2 registrants] Are you an employee?"
	la var aaa33 "[For 2021 semester 2 registrants] Have you ever worked before?"
	la var aaa34 "[For 2021 semester 2 registrants] Are you self-employed?"
	la var aaa35 "[For 2021 semester 2 registrants] Do you have job contract?"
	la var aaa36 "[For 2021 semester 2 registrants] Are you unemployed due to Covid-19?"
	la var aaa37 "[For 2021 semester 2 registrants] Covid-19 impact: decrease revenue, less customer?"
	la var aaa38 "[For 2021 semester 2 registrants] Does your working hour decrease due to Covid-19?"
	la var anon_prov_id "Province Code (updated data)"
	la var anon_hh_id "Household ID"
	la var batch_treated_22 "Batch that Respondent Won Prakerja (original data, up to batch 22)"
	la var batch_treated_39 "Batch that Respondent Won Prakerja (updated data, up to batch 39)"
	la var status_revoked "Flag for having treatment status revoked due to imcompliance (updated data)"
	la var date_incentive "Date Incentive was Received (updated data)"
	la var date_batch "Date of Batch Announcement (updated data)"
	la var data_source "Data Source: original transfer, additional variable transfer, or both"


	forval i = 1 / 22 {
		la var applied`i' "Applied in Batch `i'"
		la var win_in_batch_old`i' "Treated in Batch `i' (original data)"
		la var win_in_batch`i' "Treated in Batch `i' (updated data)"
		la var hh_applied_old`i' "At Least One Other Household Member Applied in Batch `i' (original data)"
		la var hh_win_in_batch_old`i' "Household Treated in Batch `i' (original data)"
		la var hh_win_in_batch`i' "Household Treated in Batch `i' (updated data)"
	}

* Save cleaned data
datasignature 
if "`r(datasignature)'" == "23434701:222(91828):260599919:623665136" {
   save "$KP_deid_admin/Clean/pmo_b1-22_clean_wide_deid.dta", replace
      }
else {
   di as err "Careful, your machine produces a different dataset"
   stop
		}

	cap log close


// DONE
